getwd()
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

## Packages
set.seed(717)
library(cubature)
library(lava) 
library(ggplot2)
library(mcGlobaloptim) 
library(DiceKriging) 
library(nloptr) 
library(MASS)
library(mcmc)
library(geoR)
library(RobustCalibration)
library(lhs)
library(RobustGaSP)
library(numDeriv)
library(ggplot2)
library(rTensor)
library(plotly)
library(tensorA)
library(proxy)
library(abind)
library(scatterplot3d)
library(Matrix)

load("s1-ini-setting.RData")
load("s1.pre.to.list.RData")

## Model: f(x) = B*U1*U2*x; B: 3*3*2, U1/2: 3*3, x:3*2
dim.b = c(3,3,3); dim.u1 = c(2,3); dim.u2 = c(4,3); dim.x = c(2,3)
dim.f = c(2,4,2); t1 = dim.f[1]; t2 = dim.f[2]; t3 = dim.f[3]; 
dim.h = prod(dim.f); dim.mode = length(dim.f)
dim.s = c(2,4,2); s1 = dim.s[1]; s2 = dim.s[2]; s3 = dim.s[3]; 
d = 3; lower.x = rep(0,d); upper.x = rep(1,d)

B <- e1.ini.set$B
U_mat <- e1.ini.set$U_mat
V <- e1.ini.set$V

true.model <- function(x){
  X1 = sin(5*x); X2 = cos(x)
  X = matrix(cbind(X1,X2),dim.x)
  return(array(ttm(V, X, m = 3)@data,dim.f))
}
h <- function(x) sum(true.model(x))

x.star = directL(function(x0) -h(x0),lower.x,upper.x,control=list(xtol_rel=1e-8, maxeval=1000))$par
x.star = t(as.matrix(x.star))
t.star = true.model(x.star); h.star = h(x.star)


## Kernel
norm0 <- function(x1,x2) as.matrix(dist(x1,x2,method = "Euclidean"))
norm1 <- function(x1,x2){
  nor = list()
  for(i in 1:d){
    nor[[i]] = norm0(x1[,i],x2[,i])}
  return(nor)
} 

mat0 <- function(x) matern(x, phi=1, kappa=5/2)
gau0 <- function(x) exp(-x^2)
exp0 <- function(x) exp(-abs(x))

ker.sele <- function(x1,x2,theta,ker){
  x = norm1(x1,x2)
  dis = Map(function(x0,th) x0/th,x,theta)
  R0 = Map(function(x0) ker(x0),dis)
  R = Reduce("*", R0)
  return(R)
}

################################################################################
#### GP ########################################################################
################################################################################

################################################################################
## Our proposed method: NS-TOGP
vec.lab = list()
for(om.lab in 1:dim.mode){
  vec.lab[[om.lab]] = dim.s[om.lab]*(dim.s[om.lab]+1)/2
}
vec.lab[[dim.mode+1]] = d
vec.lab[[dim.mode+2]] = vec.lab[[dim.mode+3]] = 1
group.lab <- unlist(Map(rep, LETTERS[1:length(vec.lab)], unlist(vec.lab)))
dim.hyper.smt = length(group.lab)

lower.th = c(unlist(Map(rep, c(rep(1e-1,dim.mode),1e-3,1e-2,1e-10), unlist(vec.lab))))
upper.th = c(unlist(Map(rep, c(rep(1,dim.mode),10,10,1e-2), unlist(vec.lab))))

B.tuck.smt <- function(y,n,s1,s2,s3){
  y0 = lapply(1:n, function(i) rTensor::as.tensor(y[,,, i]))
  
  mode.unfold <- function(da,k) {
    do.call(cbind, lapply(da, function(x) { k_unfold(x,k)@data}))
  }
  
  U.1 <- svd(mode.unfold(y0, 1))$u[, 1:s1]
  U.2 <- svd(mode.unfold(y0, 2))$u[, 1:s2]
  U.3 <- svd(mode.unfold(y0, 3))$u[, 1:s3]
  
  core.ten <- lapply(y0, function(x) {
    ttl(x, list(t(U.1), t(U.2), t(U.3)), 1:3)
  })
  return(list(core.ten=core.ten, U.tuck=list(U.1=U.1,U.2=U.2,U.3=U.3)))
}
# b.ren = B.smt(y,n,s1,s2,s3)

sig.smt <- function(t,ome){
  O = matrix(0,t,t)
  O[lower.tri(O, diag = T)] <- ome
  return(O)
}

ker.smt <- function(y,n,s1,s2,s3,the){
  the0 = split(the, group.lab)
  
  b.ren = B.tuck.smt(y,n,s1,s2,s3)
  Omega = list(list(s1,the0[[1]]),list(s2,the0[[2]]), list(s3,the0[[3]]))
  sig = lapply(Omega,function(the) sig.smt(the[[1]],the[[2]]))
  
  sig.tuck = Map(function(A,B) A%*%(B%*%t(B))%*%t(A), b.ren$U.tuck, sig)
  return(Reduce(kronecker,sig.tuck))
}
# ker.smt(y,n,s1,s2,s3,c(runif(dim.hyper.smt)))


likeli.smt <- function(x1,x2,y,n,s1,s2,s3,the){
  the0 = split(the, group.lab)
  
  omega = ker.smt(y,n,s1,s2,s3,the)
  k.ini = ker.sele(x1,x2,the0[[dim.mode+1]],mat0)
  k.y = the0[[dim.mode+2]]*kronecker(k.ini,omega)+the0[[dim.mode+3]]*diag(n*dim.h)
  sol.k.y = solve(k.y)
  
  log.likeli = determinant(k.y,logarithm=TRUE)$modulus+t(c(y))%*%sol.k.y%*%c(y)
  return(list(like=log.likeli, the0=the0))
}
# likeli.smt(x,x,y,n,s1,s2,c(runif(dim.hyper.mt)))


smtgp.hat <- function(x.new,x,y,n,n.test,s1,s2,s3,hy){
  x.new = matrix(x.new,n.test,d)
  omega = ker.smt(y,n,s1,s2,s3,unlist(hy))
  
  k.smt.s0 = ker.sele(x,x,hy[[dim.mode+1]],mat0)
  k.smt.s = hy[[dim.mode+2]]*kronecker(k.smt.s0,omega)+hy[[dim.mode+3]]*diag(n*dim.h)
  
  k.smt.10 = ker.sele(x.new,x,hy[[dim.mode+1]],mat0)
  k.smt.1 = hy[[dim.mode+2]]*kronecker(k.smt.10,omega)
  
  k.smt.00 = ker.sele(x.new,x.new,hy[[dim.mode+1]],mat0)
  k.smt.0 = hy[[dim.mode+2]]*kronecker(k.smt.00,omega)
  
  k.oth = k.smt.1%*%solve(k.smt.s)
  
  f.hat = k.oth%*%c(y)
  var.hat = k.smt.0-k.oth%*%t(k.smt.1)
  
  result = list(mean = f.hat, cov = var.hat)
  return(result)
}

EIJ <- function(i,j,l){
  E0 = matrix(0,dim.s[l],dim.s[l]); E0[i,j] = 1
  return(E0)
}

der.l <- function(x1,x2,y,n,s1,s2,s3,the){
  the0 = split(the, group.lab)
  
  ome = list()
  for(i in 1:dim.mode){ome[[i]] = the0[[i]]}
  th = the0[[dim.mode+1]]; sig2 = the0[[dim.mode+2]]; tau2 = the0[[dim.mode+3]]
  
  J <- function(i,l){
    E0 = matrix(0,dim.s[l],dim.s[l]); E0[i,i] = exp(sig.smt(dim.s[l],ome[[l]])[i,i])
    return(E0)
  }
  
  b.ren = B.tuck.smt(y,n,s1,s2,s3)$U.tuck
  Omega = list(list(s1,ome[[1]]),list(s2,ome[[2]]),list(s3,ome[[3]]))
  sig = lapply(Omega,function(the) sig.smt(the[[1]],the[[2]]))
  sig.tuck = Map(function(A,B) A%*%(B%*%t(B))%*%t(A), b.ren, sig)
  omega = Reduce(kronecker,sig.tuck)
  
  k.ini = ker.sele(x1,x2,th,mat0)
  k.y = sig2*kronecker(k.ini,omega)+tau2*diag(n*dim.h)
  sol.k.y = solve(k.y)
  
  al.k = sol.k.y%*%c(y)
  der.l.sig2 = tr(sol.k.y%*%kronecker(k.ini,omega))-t(al.k)%*%kronecker(k.ini,omega)%*%al.k
  der.l.tau2 = tr(sol.k.y)-t(al.k)%*%al.k
  
  der.th = array(jacobian(function(theta) ker.sele(x1,x2,theta,mat0), th),dim=c(n,n,d))
  der.l.th.i <- function(der) sig2*(tr(sol.k.y%*%kronecker(der,omega))-t(al.k)%*%kronecker(der,omega)%*%al.k)
  der.l.th = apply(der.th,3,der.l.th.i)
  
  der.l.phi = list()
  for(l in 1:dim.mode){
    der.l.phi[[l]] = matrix(0, dim.s[l], dim.s[l])
    
    der.l.phi.ij <- function(i,j) b.ren[[l]]%*%(EIJ(i,j,l)%*%t(sig[[l]])+sig[[l]]%*%EIJ(j,i,l))%*%t(b.ren[[l]])
    it1 <- function(der.p){
      list1 = if (l > 1) sig.tuck[1:(l-1)] else 1
      list2 = if (l < dim.mode) sig.tuck[(l+1):dim.mode] else 1
      kro.list = list(sig2*k.ini,Reduce(kronecker,list1),der.p,Reduce(kronecker,list2))
      return(Reduce(kronecker,kro.list))
    } 
    it2 <- function(der.p) tr(sol.k.y%*%it1(der.p))-t(al.k)%*%it1(der.p)%*%al.k
    
    der.l.phi.1 = sapply(c(1:dim.s[l]), function(i) {
      sapply(c(1:i), function(j) it2(der.l.phi.ij(i, j)))
    })
    
    der.l.phi.ii <- function(i) b.ren[[l]]%*%(J(i,l)%*%t(sig[[l]])+sig[[l]]%*%J(i,l))%*%t(b.ren[[l]])
    der.l.phi.dig = apply(as.matrix(c(1:dim.s[l])),1, function(i) it2(der.l.phi.ii(i)))
    
    for (i in 1:dim.s[l]) {
      der.l.phi[[l]][i, 1:i] <- der.l.phi.1[[i]]
    }
    # diag(der.l.phi[[l]]) = der.l.phi.dig   
  }
  
  result = list(der.l.phi=lapply(der.l.phi, function(mat) mat[lower.tri(mat, diag = TRUE)]), 
                der.l.th=der.l.th, der.l.sig2=der.l.sig2, der.l.tau2=der.l.tau2)
  return(result)
}


N = 15; lambda = 0.1
x.train = pre.to.list$x.train 
err.train = pre.to.list$err.train 
f.train = array(unlist(apply(x.train, 1, true.model)), dim=c(dim.f,N))
y.train = f.train+err.train

hyper.smt.train.ini = directL(function(the) likeli.smt(x.train,x.train,y.train,N,s1,s2,s3,the)$like,lower.th,upper.th,control=list(maxeval=1000))$par
opts <- list("algorithm"="NLOPT_LD_LBFGS", "xtol_rel"=1.0e-5)
hyper.smt.train = nloptr(x0=hyper.smt.train.ini,
                         eval_f = function(the) likeli.smt(x.train,x.train,y.train,N,s1,s2,s3,the)$like,
                         eval_grad_f=function(the) unlist(der.l(x.train,x.train,y.train,N,s1,s2,s3,the)),
                         opts=opts,lb = lower.th, ub = upper.th)$solution

likeli.train = -0.5*likeli.smt(x.train,x.train,y.train,N,s1,s2,s3,hyper.smt.train)$like
hy.smt.train = likeli.smt(x.train,x.train,y.train,N,s1,s2,s3,hyper.smt.train)$the0

pre.smt.train = smtgp.hat(x.train,x.train,y.train,N,N,s1,s2,s3,hy.smt.train)
mean.smt.train = pre.smt.train$mean
var.hat.smt.train = pre.smt.train$cov

x.test = pre.to.list$x.test; n.test=5
pre.smt.test =smtgp.hat(x.test,x.train,y.train,N,n.test,s1,s2,s3,hy.smt.train)
mean.smt.test = pre.smt.test$mean
var.hat.smt.test = pre.smt.test$cov

mae.smt = mean(abs((mean.smt.test-c(apply(x.test,1,true.model)))/c(apply(x.test,1,true.model))))
cov.op = sqrt(Matrix::norm(var.hat.smt.test,type="2"))

pre.smt.list = list(nll.smt=likeli.train, x.train=x.train, err.train=err.train, x.test=x.test, mae.smt=mae.smt, cov.op=cov.op)
save(pre.smt.list, file="s1.pre.smt.list.RData")



################################################################################
## Our proposed method: NS-SMTGP-UCB
n = 5*d; m  = 10*d; lambda=0.1; J.for=10

ins.regret.smt = cum.regret.smt = list()
like.re.smt = hyper.smt = lapply(1:J.for, function(x) list())
x0.smt = y0.smt = list(); fhat = lapply(1:J.for, function(x) list())
smtgp.bo = h.smt = list()
mse.x.smt = mae.y.smt = list()
regret.smt = ins.regret.smt = cum.regret.smt = list()
beta.smt = ucb.new.smt = lapply(1:J.for, function(x) list())

for(j.for.smt in 1:J.for){
  x = e1.ini.set$x 
  f = e1.ini.set$f 
  y = e1.ini.set$.for[[j.for.smt]]
  
  ## Setting
  ######################################## BO ####################################
  hyper.smt.old = directL(function(the) likeli.smt(x,x,y,n,s1,s2,s3,the)$like,lower.th,upper.th,control=list(maxeval=1000))$par
  opts <- list("algorithm"="NLOPT_LD_LBFGS", "xtol_rel"=1.0e-5)
  hyper.smt.new = nloptr(x0=hyper.smt.old,
                         eval_f = function(the) likeli.smt(x,x,y,n,s1,s2,s3,the)$like,
                         eval_grad_f=function(the) unlist(der.l(x,x,y,n,s1,s2,s3,the)),
                         opts=opts,lb = lower.th, ub = upper.th)$solution
  
  like.re.smt[[j.for.smt]][[1]] = likeli.smt(x,x,y,n,s1,s2,s3,hyper.smt.new)
  hyper.smt[[j.for.smt]][[1]] = like.re.smt[[j.for.smt]][[1]]$the0
  
  x0.smt[[j.for.smt]] = x; y0.smt[[j.for.smt]] = y; n.smt = n
  x.new.smt = t(as.matrix(x[which.max(apply(x,1,h)),]))
  y.new.smt = f[,,,which.max(apply(x,1,h))]
  
  hyper.smt.ucb = unlist(hyper.smt[[j.for.smt]][[1]]); delta.smt = 0.05
  fhat[[j.for.smt]][[1]] = smtgp.hat(x.new.smt,x,y0.smt[[j.for.smt]],n.smt,1,s1,s2,s3,hyper.smt[[j.for.smt]][[1]])
  
  for(i.smt in 1:m){
    sig2 = hyper.smt[[j.for.smt]][[i.smt]][[2]][2]
    eta = hyper.smt[[j.for.smt]][[i.smt]][[2]][2]
    it1 = Map(function(A) determinant(diag(dim.h)+1/eta*A[[2]],logarithm=TRUE)$modulus,fhat[[j.for.smt]])
    beta.smt[[j.for.smt]][[i.smt]] = sqrt(sig2/eta)*sqrt(2*log(1/delta.smt)+Reduce(sum,it1))
    
    ucb.smt <- function(x.new,beta){
      x.new = matrix(x.new,1,d)
      smtgp.output = smtgp.hat(x.new,x0.smt[[j.for.smt]],y0.smt[[j.for.smt]],n.smt,n.test=1,s1,s2,s3,hyper.smt[[j.for.smt]][[i.smt]])
      ucb = sum(smtgp.output$mean)+sqrt(beta)*sqrt(Matrix::norm(smtgp.output$cov,type="2"))
      return(ucb)
    }
    x.new.smt = bobyqa(x.new.smt, function(x.new) -ucb.smt(x.new,beta.smt[[j.for.smt]][[i.smt]]),
                       lower=lower.x,upper=upper.x)$par
    # x.new.smt = t(t(randomLHS(1,d))*(upper.x-lower.x) + lower.x)
    
    fhat[[j.for.smt]][[i.smt+1]] = smtgp.hat(x.new.smt,x0.smt[[j.for.smt]],y0.smt[[j.for.smt]],n.smt,n.test=1,s1,s2,s3,hyper.smt[[j.for.smt]][[i.smt]])
    y.new.smt = true.model(x.new.smt)+array(rnorm(dim.h, mean=0, sd=lambda), dim=c(dim.f))
    
    ucb.new.smt[[j.for.smt]][[i.smt]] = ucb.smt(x.new.smt,beta.smt[[j.for.smt]][[i.smt]])
    
    x0.smt[[j.for.smt]] = rbind(x0.smt[[j.for.smt]], x.new.smt)
    y0.smt[[j.for.smt]] = abind(y0.smt[[j.for.smt]], y.new.smt, along = 4)
    
    n.smt = n+i.smt
    
    if(i.smt %% 5 == 0){
      hyper.smt.ucb = nloptr(x0=unlist(hyper.smt[[j.for.smt]][[i.smt]]),
                             eval_f = function(the) likeli.smt(x0.smt[[j.for.smt]],x0.smt[[j.for.smt]],y0.smt[[j.for.smt]],n.smt,s1,s2,s3,the)$like,
                             eval_grad_f=function(the) unlist(der.l(x0.smt[[j.for.smt]],x0.smt[[j.for.smt]],y0.smt[[j.for.smt]],n.smt,s1,s2,s3,the)),
                             opts=opts,lb = lower.th, ub = upper.th)$solution
    }else{
      hyper.smt.ucb = hyper.smt.ucb
    }
    
    like.re.smt[[j.for.smt]][[i.smt+1]] = likeli.smt(x0.smt[[j.for.smt]],x0.smt[[j.for.smt]],y0.smt[[j.for.smt]],n.smt,s1,s2,s3,hyper.smt.ucb)
    hyper.smt[[j.for.smt]][[i.smt+1]] = like.re.smt[[j.for.smt]][[i.smt+1]]$the0
    print(i.smt)
  }
  
  smtgp.bo[[j.for.smt]] = Map(function(a) true.model(a),split(x0.smt[[j.for.smt]],row(x0.smt[[j.for.smt]])))
  h.smt[[j.for.smt]] = apply(x0.smt[[j.for.smt]],1,h)
  
  mse.x.smt[[j.for.smt]] = apply(x0.smt[[j.for.smt]],1,function(x) mean((x-x.star)^2))
  mae.y.smt[[j.for.smt]] = unlist(lapply(fhat[[j.for.smt]],function(a) mean(abs((a$mean-c(t.star))/c(t.star)))))
  
  regret.smt[[j.for.smt]] = h.star-unlist(h.smt[[j.for.smt]])
  ins.regret.smt[[j.for.smt]] = h.star-cummax(h.smt[[j.for.smt]])
  cum.regret.smt[[j.for.smt]] = cumsum(ins.regret.smt[[j.for.smt]])
  
  layout(matrix(1, nrow = 1, ncol = 1))
  plot(rep(h.star,(n.smt-n+1)),type="b",lwd=3,lty=1,pch=1,col=1,ylim=c(cummax(h.smt[[j.for.smt]])[n],h.star))
  lines(cummax(h.smt[[j.for.smt]])[n:n.smt],type="b",lwd=3,lty=2,pch=2,col=2)
  
  plot(log(ins.regret.smt[[j.for.smt]]+1e-10)[n:n.smt],type="b",lwd=3,lty=1,pch=1,col=1)
  
  print(j.for.smt)
}



fsmt.ucb.list = list(like.re.smt=like.re.smt, hyper.smt=hyper.smt, 
                     x0.smt=x0.smt, y0.smt=y0.smt, smtgp.bo=smtgp.bo, h.smt=h.smt, 
                     mse.x.smt=mse.x.smt, mae.y.smt=mae.y.smt, 
                     regret.smt=regret.smt, ins.regret.smt=ins.regret.smt, cum.regret.smt=cum.regret.smt,
                     beta.smt=beta.smt, ucb.new.smt=ucb.new.smt)
save(fsmt.ucb.list, file="s1.fsmt.ucb.RData")



























